library(yaml)
library(foreign)
library(data.table)
library(tidyverse)
library(readstata13)

### Load paths
CONFIG        <- yaml.load_file("config_global.yaml")
external_path <- CONFIG$source$externals$orig$root
build_path    <- CONFIG$build$prepare_data
lib_path      <- CONFIG$source$lib
helpers_path  <- CONFIG$source$prepare_data$helpers

main <- function(){
  
  # Load Data
  imd  <- read.dta13(sprintf("%s/cses/cses_imd.dta", external_path))
  mod5 <- read.dta13(sprintf("%s/cses/cses5.dta", external_path))
  CSES <- c()
  countries <- c("Australia", "Canada", "Switzerland", "Germany", "Denmark", "France", "Great Britain", "Norway",
                 "New Zealand", "Sweden", "United States of America", "Japan")
  
  imd_weights <- list("Australia" = "1010_2", "Canada" = "1010_1", "Switzerland" = "1010_3", "Germany" = "1010_2", 
                      "Denmark" = list("1998" = "1010_2", "2001" = "1010_1", "2007" = "1010_1"), "France" = "1010_2", 
                      "Great Britain" = list("1997" = "1010_1", "2005" = "1010_2", "2015" = "1010_2"),  "Norway" = "1010_2", 
                      "New Zealand" = "1010_3", "Sweden" = "1010_1", 
                      "United States of America" = list("1996" = "1010_2", "2004" = "1010_2", "2008" = "1010_1", "2012" = "1010_2"), 
                      "Japan" = list("1996" = "1010_1", "2004" = "1010_1", "2007" = "1010_1", "2013" = "1010_2"))
  
  mod5_weights <- list("Australia" = "1010_1", "Germany" = "1010_2", "France" = "1010_2", 
                       "Norway" = "1010_2", "New Zealand" = "1010_2", "United States of America" = "1010_2")
  for (c in countries){
    cses <- clean_cses(imd[imd$IMD1006_NAM == c, ], prefix = "IMD", country = c,
                       vars = list(year = "1008_YEAR", thermo = "3008_", party = "3005_3", party_num = "5000_", 
                                   party_sh = "5001_", partisan = "3005_1", ideology = "3006", weight = imd_weights[[c]]))
    
    cses5 <- NULL
    if (c %in% names(mod5_weights)){
      cses5 <- clean_cses(mod5[mod5$E1006_NAM == c, ], prefix = "E", country = c,
                          vars = list(year = "1008", thermo = "3017_", party = "3024_3", party_num = "5000_", 
                                      party_sh = "5001_", partisan = "3024_1", ideology = "3020", weight = mod5_weights[[c]]))
    }
    
    CSES  <- rbind(CSES, rbind(cses, cses5))
  }
  
  write.csv(CSES, sprintf("%s/cses_data.csv", build_path))
}

clean_cses <- function(cses, prefix, country, vars){
  # Setup
  cses$party_id    <- cses[, sprintf("%s%s", prefix, vars$party)]
  cses$party_id[cses$party_id %in% c("9999999. MISSING",
                                     "9999992. OTHER CANDIDATE/PARTY (NOT FURTHER SPECIFIED)",
                                     "9999997. VOLUNTEERED: REFUSED",
                                     "9999998. VOLUNTEERED: DON'T KNOW",
                                     "9999988. NONE OF THE CANDIDATES PARTIES",
                                     "9999989. INDEPENDENT CANDIDATE",
                                     "9999990. OTHER LEFT WING CANDIDATE/PARTY",
                                     "9999991. OTHER RIGHT WING CANDIDATE/PARTY",
                                     "999999. MISSING",
                                     "999998. VOLUNTEERED: DON'T KNOW",
                                     "999997. VOLUNTEERED: REFUSED",
                                     "999992. OTHER CANDIDATE/PARTY (NOT FURTHER SPECIFIED)",
                                     "999989. INDEPENDENT CANDIDATE")] <- NA
  
  cses$year        <- cses[, sprintf("%s%s", prefix, vars$year)]
  if (length(vars$weight) == 1){ 
    cses$weight      <- cses[, sprintf("%s%s", prefix, vars$weight)] # Weights are not consistently available across surveys. 
  } else {
    for (k in names(vars$weight)){
      cses[which(as.numeric(cses$year) == as.numeric(k)), "weight"] <- cses[which(as.numeric(cses$year) == as.numeric(k)),  sprintf("%s%s", prefix, vars$weight[[k]])]
    }
  }
  cses$has_leaner  <- TRUE
  cses$country     <- as.character(cses[ , sprintf("%s1006_NAM", prefix)])
  cses$id          <- cses[, sprintf("%s1005", prefix)]
  
  cses$ideology    <- cses[, sprintf("%s%s", prefix, vars$ideology)]
  cses$ideology[cses$ideology > 10] <- NA   
  
  # Get feeling variables in correct order
  j <- 1
  for (i in c("A", "B", "C", "D", "E", "F", "G", "H", "I")){
    cses[, sprintf("feel_party%s", j)]      <- cses[, sprintf("%s%s%s", prefix, vars$thermo, i)] * 10
    cses[cses[, sprintf("feel_party%s", j)] > 100, sprintf("feel_party%s", j)] <- NA
    
    cses[, sprintf("numerical_party%s", j)] <- cses[, sprintf("%s%s%s", prefix, vars$party_num, i)]
    j <- j + 1
  }
  
  # Get party identification to match feeling variables
  cses$party <- cses$leaner <- NA
  for (j in 1:9){
    cses$leaner[which(
      as.character(cses[, sprintf("%s%s", prefix, vars$partisan)]) != "1. YES" &
        as.character(cses$party_id) == as.character(cses[, sprintf("numerical_party%s", j)]))
      ] <- j
    
    cses$party[which( 
      as.character(cses[, sprintf("%s%s", prefix, vars$partisan)]) == "1. YES" & 
        as.character(cses$party_id) == as.character(cses[, sprintf("numerical_party%s", j)]))
      ] <- j
  }

  # Get party shares
  j <- 1
  for (i in c("A", "B", "C", "D", "E", "F", "G", "H", "I")){
    cses[, sprintf("lower_house_party_shares%s", j)] <- cses[, sprintf("%s%s%s", prefix, vars$party_sh, i)] / 100
    cses[cses[, sprintf("lower_house_party_shares%s", j)] > 1, sprintf("lower_house_party_shares%s", j)] <- 0
    j <- j + 1
  }
  
  # Drop East Germany
  if (country == "Germany"){
    # 1998: West Germany == 001
    # 2002: West Germany == 002
    # 2005/7/9: West Germany == 002
    # 2013: West Germany == 001
    # 2017: West Germany == 001
    cses <- cses[which( (cses[, sprintf("%s1007", prefix)] == "002. [SEE ELECTION STUDY NOTES]" & cses[, "year"] %in% c(2002, 2005, 2007, 2009)) | 
                          (cses[, sprintf("%s1007", prefix)] == "001. [SEE ELECTION STUDY NOTES]" & cses[, "year"] %in% c(1998, 2013, 2017))), ]
  }
  
  # Return 
  cses <- cses[, c("id", "party", "leaner", "has_leaner", "ideology", "weight", 
                   sprintf("feel_party%s", 1:9), sprintf("lower_house_party_shares%s", 1:9),
                   "year", "country")]
  
  return(cses)
}

main()
